Insracart

Read in the data

data("instacart")

instacart = 
  instacart %>% 
  as_tibble(instacart)

Clean the dataset

instacart_tidy = 
  instacart %>% 
  janitor::clean_names() %>%
  mutate(
    day = order_dow + 1,
    order_day_of_week = lubridate::wday(day, label = TRUE)
    ) %>%
  select(order_id, user_id, reordered, order_day_of_week, order_hour_of_day, days_since_prior_order, product_name, aisle, department) %>%
  drop_na()

Column

Chart A

First is a line plot shows the distributions of order time during a day.

instacart_tidy %>%
  group_by(order_day_of_week, order_hour_of_day) %>%
  summarize(count = n_distinct(order_id)) %>%
  plot_ly(x = ~order_hour_of_day, y = ~count, type = "scatter", mode = "lines",
          color = ~order_day_of_week, alpha = 0.8) %>%
  layout(title =  "Distributions of Order Time During a Day",
         xaxis = list(title = "Time"),
         yaxis = list(title = "Number of Orders"))
## `summarise()` has grouped output by 'order_day_of_week'. You can override using
## the `.groups` argument.

Column

Chart B

Second is a bar chart that shows the top 10 aisle with the greatest number of items ordered.

aisle_10 = instacart_tidy %>%
  group_by(aisle) %>% 
  summarise(count = n()) %>% 
  mutate(aisle = str_to_title(aisle)) %>%
  arrange(-count) %>% 
  top_n(10) 
## Selecting by count
aisle_10 %>% 
  mutate(aisle = fct_reorder(aisle, -count)) %>%
  plot_ly(x = ~aisle, y = ~count, color = ~aisle, type = "bar") %>%
  layout(title = "Number of Items Ordered in Top 10 Popular Aisles",
         xaxis = list(title = "Aisle"), 
         yaxis = list(title = "Number of items ordered"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Column

Chart C

Third is a boxplot shows the distributions of order intervals in the top 10 popular aisles.

aisle_name_10 = aisle_10 %>%
  pull(aisle)

instacart_tidy %>% 
  mutate(aisle = str_to_title(aisle)) %>%
  filter(aisle %in% aisle_name_10) %>%
  mutate(aisle = fct_reorder(aisle, days_since_prior_order)) %>% 
  plot_ly(x = ~aisle, y = ~days_since_prior_order, type = "box",
          color = ~aisle, alpha = 0.5) %>% 
  layout(title = "Distributions of Order Interval in the Top 10 Popular Aisles",
         xaxis = list(title = "Aisle"),
         yaxis = list(title = "Days Since Prior Order"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors